\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\ifx\rEfLiNK\UnDef\gdef \rEfLiNK#1#2{#2}\fi
\newlabel{sec:introduction}{{\rEfLiNK{x1-20001}{\csname :autoref\endcsname{section}1}}{\rEfLiNK{x1-20001}{\csname :autoref\endcsname{section}4}}{\rEfLiNK{x1-20001}{\csname :autoref\endcsname{section}Introduction}}{section.1}{}}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:learning_model}{{\rEfLiNK{x1-2001r1}{\csname :autoref\endcsname{figure}1}}{\rEfLiNK{x1-2001r1}{\csname :autoref\endcsname{figure}6}}{\rEfLiNK{x1-2001r1}{\csname :autoref\endcsname{figure}A simplified diagram of the general model building procedure for pattern classification.}}{figure.1}{}}
\citation{rish2001empirical}
\citation{domingos1997optimality}
\citation{kazmierska2008application}
\citation{wang2007naive}
\citation{sahami1998bayesian}
\newlabel{sec:naive_bayes_classification}{{\rEfLiNK{x1-30002}{\csname :autoref\endcsname{section}2}}{\rEfLiNK{x1-30002}{\csname :autoref\endcsname{section}7}}{\rEfLiNK{x1-30002}{\csname :autoref\endcsname{section}Naive Bayes Classification}}{section.1}{}}
\newlabel{sec:overview}{{\rEfLiNK{x1-40002.1}{\csname :autoref\endcsname{subsection}2.1}}{\rEfLiNK{x1-40002.1}{\csname :autoref\endcsname{subsection}7}}{\rEfLiNK{x1-40002.1}{\csname :autoref\endcsname{subsection}Overview}}{subsection.1}{}}
\newlabel{fig:nonlinear_probs}{{\rEfLiNK{x1-4001r2}{\csname :autoref\endcsname{figure}2}}{\rEfLiNK{x1-4001r2}{\csname :autoref\endcsname{figure}9}}{\rEfLiNK{x1-4001r2}{\csname :autoref\endcsname{figure}Linear (A) vs. non-linear problems (B). Random samples for two different classes are shown as colored spheres, and the dotted lines indicate the class boundaries that classifiers try to approximate by computing the decision boundaries. A non-linear problem (B) would be a case where linear classifiers, such as naive Bayes, would not be suitable since the classes are not linearly separable. In such a scenario, non-linear classifiers (e.g.,instance-based nearest neighbor classifiers) should be preferred.}}{figure.1}{}}
\newlabel{sec:posterior_probabilities_1}{{\rEfLiNK{x1-50002.2}{\csname :autoref\endcsname{subsection}2.2}}{\rEfLiNK{x1-50002.2}{\csname :autoref\endcsname{subsection}10}}{\rEfLiNK{x1-50002.2}{\csname :autoref\endcsname{subsection}Posterior Probabilities}}{subsection.1}{}}
\newlabel{sec:class-conditional_probabilities_1}{{\rEfLiNK{x1-60002.3}{\csname :autoref\endcsname{subsection}2.3}}{\rEfLiNK{x1-60002.3}{\csname :autoref\endcsname{subsection}12}}{\rEfLiNK{x1-60002.3}{\csname :autoref\endcsname{subsection}Class-conditional Probabilities}}{subsection.1}{}}
\citation{zhang2004optimality}
\newlabel{sec:prior_probabilities_1}{{\rEfLiNK{x1-70002.4}{\csname :autoref\endcsname{subsection}2.4}}{\rEfLiNK{x1-70002.4}{\csname :autoref\endcsname{subsection}15}}{\rEfLiNK{x1-70002.4}{\csname :autoref\endcsname{subsection}Prior Probabilities}}{subsection.1}{}}
\newlabel{fig:effect_priors}{{\rEfLiNK{x1-7008r3}{\csname :autoref\endcsname{figure}3}}{\rEfLiNK{x1-7008r3}{\csname :autoref\endcsname{figure}20}}{\rEfLiNK{x1-7008r3}{\csname :autoref\endcsname{figure}The effect of prior probabilities on the decision regions. The figure shows an 1-dimensional random sample from two different classes (blue and green crosses). The data points of both the blue and the green class are normally distributed with standard deviation 1, and the bell curves denote the class-conditional probabilities. If the class priors are equal, the decision boundary of a naive Bayes classifier is placed at the center between both distributions (gray bar). An increase of the prior probability of the blue class ($\omega _1$) leads to an extension of the decision region R1 by moving the decision boundary (blue-dotted bar) towards the other class and vice versa.}}{figure.1}{}}
\newlabel{sec:evidence}{{\rEfLiNK{x1-80002.5}{\csname :autoref\endcsname{subsection}2.5}}{\rEfLiNK{x1-80002.5}{\csname :autoref\endcsname{subsection}21}}{\rEfLiNK{x1-80002.5}{\csname :autoref\endcsname{subsection}Evidence}}{subsection.1}{}}
\newlabel{sec:multinomial_naive_bayes-a_toy_example}{{\rEfLiNK{x1-90002.6}{\csname :autoref\endcsname{subsection}2.6}}{\rEfLiNK{x1-90002.6}{\csname :autoref\endcsname{subsection}23}}{\rEfLiNK{x1-90002.6}{\csname :autoref\endcsname{subsection}Multinomial Naive Bayes - A Toy Example}}{subsection.1}{}}
\newlabel{fig:toy_dataset}{{\rEfLiNK{x1-9001r4}{\csname :autoref\endcsname{figure}4}}{\rEfLiNK{x1-9001r4}{\csname :autoref\endcsname{figure}25}}{\rEfLiNK{x1-9001r4}{\csname :autoref\endcsname{figure}A simple toy dataset of 12 samples 2 different classes $+, -$ . Each sample consists of 2 features: color and geometrical shape.}}{figure.1}{}}
\newlabel{fig:new_sample1}{{\rEfLiNK{x1-9003r5}{\csname :autoref\endcsname{figure}5}}{\rEfLiNK{x1-9003r5}{\csname :autoref\endcsname{figure}28}}{\rEfLiNK{x1-9003r5}{\csname :autoref\endcsname{figure}A new sample from class $+$ and the features $\textbf  {x} = \text  {[blue, square]}$ that is to be classified using the training data in Figure \let \prOteCt \relax \let \prOteCt \relax \Protect \::ref {fig:toy_dataset}.}}{figure.1}{}}
\newlabel{sec:additive_smoothing}{{\rEfLiNK{x1-120002.6.3}{\csname :autoref\endcsname{subsubsection}2.6.3}}{\rEfLiNK{x1-120002.6.3}{\csname :autoref\endcsname{subsubsection}32}}{\rEfLiNK{x1-120002.6.3}{\csname :autoref\endcsname{subsubsection}Additive Smoothing}}{subsubsection.1}{}}
\newlabel{fig:new_sample2}{{\rEfLiNK{x1-12001r6}{\csname :autoref\endcsname{figure}6}}{\rEfLiNK{x1-12001r6}{\csname :autoref\endcsname{figure}34}}{\rEfLiNK{x1-12001r6}{\csname :autoref\endcsname{figure}A new sample from class $+$ and the features $\textbf  {x} = \text  {[yellow, square]}$ that is to be classified using the training data in Figure \let \prOteCt \relax \let \prOteCt \relax \Protect \::ref {fig:toy_dataset}.}}{figure.1}{}}
\citation{yao2013rotation}
\newlabel{sec:the_bag_of_words_model}{{\rEfLiNK{x1-140003.1}{\csname :autoref\endcsname{subsection}3.1}}{\rEfLiNK{x1-140003.1}{\csname :autoref\endcsname{subsection}36}}{\rEfLiNK{x1-140003.1}{\csname :autoref\endcsname{subsection}The Bag of Words Model}}{subsection.1}{}}
\newlabel{fig:bag_of_words}{{\rEfLiNK{x1-14002r1}{\csname :autoref\endcsname{table}1}}{\rEfLiNK{x1-14002r1}{\csname :autoref\endcsname{table}40}}{\rEfLiNK{x1-14002r1}{\csname :autoref\endcsname{table}Bag of words representation of two sample documents $D_1$ and $D_2$.}}{table.1}{}}
\newlabel{sec:tokenization}{{\rEfLiNK{x1-150003.1.1}{\csname :autoref\endcsname{subsubsection}3.1.1}}{\rEfLiNK{x1-150003.1.1}{\csname :autoref\endcsname{subsubsection}41}}{\rEfLiNK{x1-150003.1.1}{\csname :autoref\endcsname{subsubsection}Tokenization}}{subsubsection.1}{}}
\newlabel{sec:stopwords}{{\rEfLiNK{x1-160003.1.2}{\csname :autoref\endcsname{subsubsection}3.1.2}}{\rEfLiNK{x1-160003.1.2}{\csname :autoref\endcsname{subsubsection}44}}{\rEfLiNK{x1-160003.1.2}{\csname :autoref\endcsname{subsubsection}Stop Words}}{subsubsection.1}{}}
\citation{porter1980algorithm}
\newlabel{sec:stemming_and_lemmatization}{{\rEfLiNK{x1-170003.1.3}{\csname :autoref\endcsname{subsubsection}3.1.3}}{\rEfLiNK{x1-170003.1.3}{\csname :autoref\endcsname{subsubsection}47}}{\rEfLiNK{x1-170003.1.3}{\csname :autoref\endcsname{subsubsection}Stemming and Lemmatization}}{subsubsection.1}{}}
\citation{toman2006influence}
\citation{zevcevic2011n}
\citation{kevselj2003n}
\citation{kanaris2007words}
\newlabel{sec:n-grams}{{\rEfLiNK{x1-180003.1.4}{\csname :autoref\endcsname{subsubsection}3.1.4}}{\rEfLiNK{x1-180003.1.4}{\csname :autoref\endcsname{subsubsection}53}}{\rEfLiNK{x1-180003.1.4}{\csname :autoref\endcsname{subsubsection}\emph  {N}-grams}}{subsubsection.1}{}}
\newlabel{sec:decision_rule_spam}{{\rEfLiNK{x1-190003.2}{\csname :autoref\endcsname{subsection}3.2}}{\rEfLiNK{x1-190003.2}{\csname :autoref\endcsname{subsection}53}}{\rEfLiNK{x1-190003.2}{\csname :autoref\endcsname{subsection}The Decision Rule for Spam Classification}}{subsection.1}{}}
\newlabel{sec:bernoulli_bayes}{{\rEfLiNK{x1-200003.3}{\csname :autoref\endcsname{subsection}3.3}}{\rEfLiNK{x1-200003.3}{\csname :autoref\endcsname{subsection}55}}{\rEfLiNK{x1-200003.3}{\csname :autoref\endcsname{subsection}Multi-variate Bernoulli Naive Bayes}}{subsection.1}{}}
\newlabel{sec:multinomial_bayes}{{\rEfLiNK{x1-210003.4}{\csname :autoref\endcsname{subsection}3.4}}{\rEfLiNK{x1-210003.4}{\csname :autoref\endcsname{subsection}56}}{\rEfLiNK{x1-210003.4}{\csname :autoref\endcsname{subsection}Multinomial Naive Bayes}}{subsection.1}{}}
\newlabel{sec:term_frequency}{{\rEfLiNK{x1-220003.4.1}{\csname :autoref\endcsname{subsubsection}3.4.1}}{\rEfLiNK{x1-220003.4.1}{\csname :autoref\endcsname{subsubsection}57}}{\rEfLiNK{x1-220003.4.1}{\csname :autoref\endcsname{subsubsection}Term Frequency}}{subsubsection.1}{}}
\newlabel{sec:tf-idf}{{\rEfLiNK{x1-230003.4.2}{\csname :autoref\endcsname{subsubsection}3.4.2}}{\rEfLiNK{x1-230003.4.2}{\csname :autoref\endcsname{subsubsection}59}}{\rEfLiNK{x1-230003.4.2}{\csname :autoref\endcsname{subsubsection}Term Frequency - Inverse Document Frequency (Tf-idf)}}{subsubsection.1}{}}
\citation{mccallum1998comparison}
\citation{rudner2002automated}
\newlabel{sec:naive_bayes_variants}{{\rEfLiNK{x1-250004}{\csname :autoref\endcsname{section}4}}{\rEfLiNK{x1-250004}{\csname :autoref\endcsname{section}60}}{\rEfLiNK{x1-250004}{\csname :autoref\endcsname{section}Variants of the Naive Bayes Model}}{section.1}{}}
\newlabel{sec:continuous_variables}{{\rEfLiNK{x1-260004.1}{\csname :autoref\endcsname{subsection}4.1}}{\rEfLiNK{x1-260004.1}{\csname :autoref\endcsname{subsection}61}}{\rEfLiNK{x1-260004.1}{\csname :autoref\endcsname{subsection}Continuous Variables}}{subsection.1}{}}
\bibdata{../references/bayes}
\bibcite{rish2001empirical}{1}
\bibcite{domingos1997optimality}{2}
\newlabel{sec:eager_and_lazy}{{\rEfLiNK{x1-270004.2}{\csname :autoref\endcsname{subsection}4.2}}{\rEfLiNK{x1-270004.2}{\csname :autoref\endcsname{subsection}62}}{\rEfLiNK{x1-270004.2}{\csname :autoref\endcsname{subsection}Eager and Lazy Learning Algorithms}}{subsection.1}{}}
\bibcite{kazmierska2008application}{3}
\bibcite{wang2007naive}{4}
\bibcite{sahami1998bayesian}{5}
\bibcite{zhang2004optimality}{6}
\bibcite{yao2013rotation}{7}
\bibcite{porter1980algorithm}{8}
\bibcite{toman2006influence}{9}
\bibcite{zevcevic2011n}{10}
\bibcite{kevselj2003n}{11}
\bibcite{kanaris2007words}{12}
\bibcite{mccallum1998comparison}{13}
\bibcite{rudner2002automated}{14}
\bibstyle{unsrt}
